function [] = ur_analysis(varargin)
%This is the ur analysis, UR ANALYSIS!
%Synatax:   [data] = ur_analysis();
%Input:     dir_tmp = the directory of interest
%           vol_idx = the column where the volume is, if not specified it
%               defaults to 2, but this default only works for synapse
%               properties not total properties, because total properties
%               for now also contains the the x, y and z of the punta (for
%               now at least)
%           int_loc = the column locations of intensity measurments.  We
%               want this info because intensity measurments needs to be
%               transformed and normalized.  Default = (1,3), again not set
%               for total data.  Note: In log normalized data, the volume
%               and distance parameters will only be normalized not log
%               transformed.
%           exp_norm = normalize exposure, default  = 0 (off) 1 = global max, 
%               2 = global min, 3 = paired max, 4 = paired min : Now could do multiple [1,3] = do both
%               max and paired.
%           full = run all of the analysis instead of just the properties,
%               which are the output you want for the most part.  Default =
%               0, not full, partial, only properties summary analysis and count
%               summary, no joining of verticies and rotated points.
%Output:    data = the total numbers data structure

[dir_tmp,vol_idx,int_loc,exp,minmax,minormax,exp_norm,full] = parse(varargin);

%lets process the root
dir_one = dir(dir_tmp);  %grab the root: PSD_Syn-VGluT1
idx = [dir_one.isdir];   %grab all of the isdir numbers
names_tmp = {dir_one.name};   %grab the all of the names in the root
root_dir = names_tmp(idx);    %the directory names in root
fid = fopen([dir_tmp,filesep,'output_lookup.csv']);  %grab the lookup table for later
txt_tmp = textscan(fid,'%s%s%s%s%s%s','delimiter',',','HeaderLines',1);
out_lookup = [txt_tmp{1},txt_tmp{2},txt_tmp{3},txt_tmp{4},txt_tmp{5},txt_tmp{6}];

%one level down
for i = 3:size(root_dir,2)
    allprop = [];
    dir_two = dir([dir_tmp,filesep,root_dir{i}]); %KO vs WT.
    idx = [dir_two.isdir];   %grab all of the isdir numbers
    names_tmp = {dir_two.name};   %grab the all of the names in the root
    two_dir = names_tmp(idx);    %the directory names in root
    for j = 3:size(two_dir,2)
        dir_three = dir([dir_tmp,filesep,root_dir{i},filesep,two_dir{j}]); %Layer4, Layer5
        idx = [dir_three.isdir];   %grab all of the isdir numbers
        names_tmp = {dir_three.name};   %grab the all of the names in the root
        three_dir = names_tmp(idx);    %the directory names in root
        for k = 3:size(three_dir,2)     %now is the time for analysis
            dir_four = dir([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k}]); %Layer4, Layer5
            idx = [dir_four.isdir];   %grab all of the isdir numbers
            names_tmp = {dir_four.name};   %grab the all of the names in the root
            four_dir = names_tmp(idx);    %the directory names in root
            %now parse to run 
            for l = 3:size(four_dir,2)
                display(['Working on: ',filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep]);
                switch four_dir{l}
                    case {'aflat','flat','rotated'}  %join these vertices
                        if full
                            try     %might error out due to empty directories
                                join_verts_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep]);
                                join_verts_norm_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep]);
                            end
                        end
%                     case {'count','allcount','alluniquecount'}  %join these counts
%                         join_counts([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep]);
                    case {'aprop','prop'}   %join these vertices and go deeper
                        if full
                            join_verts_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep],1,int_loc);
                            join_verts_norm_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep],1,int_loc);
                            %go deeper for summary data
                            dir_five = dir([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l}]); %prop - into joined and joined_norm
                            idx = [dir_five.isdir];   %grab all of the isdir numbers
                            names_tmp = {dir_five.name};   %grab the all of the names in the root
                            five_dir = names_tmp(idx);    %the directory names in root
                            for m = 3:size(five_dir,2)
                                %now grab all the files
                                dir_six = dir([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep,five_dir{m}]); %joined
                                idx = [dir_six.isdir];   %grab all of the isdir numbers
                                names_tmp = {dir_six.name};   %grab the all of the names in the root
                                six_files = names_tmp(~idx);    %the filenames
                                %now process properties
                                prop_sum2(six_files,[dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep,five_dir{m},filesep],vol_idx);
                                %grab the information again, and join the
                                %summary files
                                dir_six = dir([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep,five_dir{m}]); %joined
                                idx = [dir_six.isdir];   %grab all of the isdir numbers
                                names_tmp = {dir_six.name};   %grab the all of the names in the root
                                six_files = names_tmp(~idx);    %the filenames
                                %filter for only the files we want
                                file_idx = zeros(size(six_files));   %create an index for selecting the flat files
                                for n = 1:size(six_files,2)
                                    %the only files we care about are the flat rotated vertices and the properties for now
                                    if strcmp('sum',six_files{n}(1:3))
                                        file_idx(n) = 1;    %set this for removal
                                    end
                                end
                                %create the desired lists of file names.
                                filenames = six_files(logical(file_idx));
                                %now join the summary
                                join_sum(filenames,[dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep,five_dir{m},filesep],1);
                                join_props_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep],int_loc);
                            end
                        end
                        %now run sum_collate to generate collated summary
                        %files
                        [total_prop,dataset_name{j-2,k-2}] = sum_collate_loco([dir_tmp,filesep,root_dir{i},filesep,two_dir{j},filesep,three_dir{k},filesep,four_dir{l},filesep],vol_idx,int_loc);
                end
            end
            %currently R1, R2, etc. are not considered in the analysis use: allprop.(two_dir{j}).(three_dir{k}).(four_dir{l}) = total_prop;  If needed in the future we will make R1 R2 work.
            allprop.(two_dir{j}).(three_dir{k}) = total_prop;
        end
    end
    %process allprop
    process_allprop(allprop,[dir_tmp,filesep,root_dir{i}],dataset_name,'quantile','sum_collated_quantile');
    for o = 1:size(exp_norm,2)
        switch exp_norm(o)
            case {3,4}  %paired data
                [allprop_norm_paired] = exposure_pairnorm(allprop,exp,out_lookup,dataset_name,int_loc,root_dir{i},minormax{o});
                process_allprop(allprop_norm_paired,[dir_tmp,filesep,root_dir{i}],dataset_name,'none','sum_collated_exp_norm_paired');
            case {1,2}
                [allprop_norm_global] = exposure_norm(allprop,exp,minmax,out_lookup,dataset_name,int_loc,root_dir{i},minormax{o});
                process_allprop(allprop_norm_global,[dir_tmp,filesep,root_dir{i}],dataset_name,'none','sum_collated_exp_norm_global');
        end
    end
end
%--------------------------------------------------------------------------
%subfunction to parse the inputs.
function [dir_tmp,vol_idx,int_loc,exp,minmax,minormax,exp_norm,full] = parse(input)

dir_tmp = [];
vol_idx = 2;
int_loc = [1,3];
exp = [];
minmax = [];
exp_norm = 0;
minormax = [];
full = 0;

%Parse the input
if ~isempty(input)
    for i = 1:2:size(input,2)
        if ischar(input{1,i});
            switch input{1,i}
                case 'dir_tmp'
                    dir_tmp = input{1,i+1};
                case 'vol_idx'
                    vol_idx = input{1,i+1};
                case 'int_loc'
                    int_loc = input{1,i+1};
                case 'exp_norm'
                    exp_norm = input{1,i+1};
                case 'full'
                    full = input{1,i+1};
                otherwise
                    warning(['Your input ',input{1,i},' is not recognized.']);
            end
        else
            error(['The parameters you entered is incorrect.  Please check help.']);
        end
    end
end

%where are the files?
if isempty(dir_tmp)
    dir_tmp = uigetdir2('','Directory where the files are located');    %get the directory
end
%if you want to normalize exposure...
if exp_norm~=0
    [exp,minmax] = exposure_collate;
    for j = 1:size(exp_norm,2)
        switch exp_norm(j)
            case {1,3}
                minormax{j} = 'max';
            case {2,4}
                minormax{j} = 'min';
        end
    end
end

%----------------------------------------------------------------------------------------------------
function [output] = join_verts_loco(dir_tmp,norm_prop,int_loc)
%This function takes asks you to open a bunch of vertex files and it will
%join then all together as one file.  Will work for not vertices files as
%well just make sure they have the same number of columns. Files must be of
%the formate s6s7(punct)thekdj.csv or s6s7(punct)thekdj(punct)46573.csv
%Synatax:   [data] = join_verts();
%Input:     dir_tmp = the directory of interest
%Output:    output = the joined data set. Sturucture with fields: data = the
%               data from each file and name = the filenames in order.

if nargin==0||isempty(dir_tmp)
    prompt_box('title','Open File Location','prompt1','Select the vertex lists you want to combine.','position','center');
    pause(0.25);
    %     [filename_tmp,pathname_tmp,filterindex] = uigetfile2({'*.csv','Text files (*.csv)';'*.xlsx','Excel 2007 files (*.xlsx)';...
    %         '*.xls','Excel files (*.xls)';'*.*','All Files';},...
    %         'Open Stack','Multiselect','on');
    dir_tmp = uigetdir2('','Directory where the files are located');    %get the directory
end
if nargin<2
    norm_prop = 0;   %off
end
%Now sort through the directory and find the
[filenames,names] = dir_sort(dir_tmp);
%Now open each file append and save
mkdir(dir_tmp,'joined');
if norm_prop
    mkdir(dir_tmp,'joined_log');
    mkdir(dir_tmp,'joined_median');
end
for j = 1:size(filenames,2) %go through the file sets
    filename_tmp = filenames{j};
    data = [];  %initialize/reset
    verti_tmp = [];
    if norm_prop
        qdata = [];  %initialize/reset
        mdata = [];  %initialize/reset
    end
    for i = 1:size(filename_tmp,1)      %now append the files
        %verti_tmp{i} = single(dlmread([pathname_tmp,filename_tmp{1,i}],','));
        try     %file could be empty
            verti_tmp = single(dlmread([dir_tmp,filesep,filename_tmp(i,:)],',',1,0));
        end
        data = vertcat(data,verti_tmp);       %join all the opened data together
        if norm_prop && ~isempty(verti_tmp)
            mverti_tmp = manorm(verti_tmp,'Method','median','LogData',0);
            qverti_tmp = verti_tmp;
            for o = 1:size(int_loc,2)
                qverti_tmp(:,int_loc(o)) = log2(verti_tmp(:,int_loc(o)));
                %qverti_tmp(:,int_loc(o)) = manorm(log2(verti_tmp(:,int_loc(o))),'Method','median','LogData',1);
            end
            qdata = vertcat(qdata,qverti_tmp);  %store the data in a structure
            mdata = vertcat(mdata,mverti_tmp);  %store the data in a structure
        end
    end
    if ~isempty(data)   %if not empty proceed
        %remove the NaN
        %data(isnan(data(:,1)),:) = [];
        %now save out the data file
        dataout = dataset(data);
        sav2csv(dataout,[names{j,1},'_',num2str(size(data,1)),'.csv'],[dir_tmp,filesep,'joined']);
        output(j).data = data;
        if norm_prop
            %qdata(isnan(qdata(:,1)),:) = [];
            qdataout = dataset(qdata);
            sav2csv(qdataout,[names{j,1},'_',num2str(size(qdata,1)),'.csv'],[dir_tmp,filesep,'joined_log']);
            output(j).qdata = qdata;
            %mdata(isnan(mdata(:,1)),:) = [];
            mdataout = dataset(mdata);
            sav2csv(mdataout,[names{j,1},'_',num2str(size(mdata,1)),'.csv'],[dir_tmp,filesep,'joined_median']);
            output(j).mdata = mdata;
        end
    end
end
output(1).names = names;
%----------------------------------------------------------------------------------------------------
function [output] = join_verts_norm_loco(dir_tmp,norm_prop,int_loc)
%This function takes asks you to open a bunch of vertex files and it will
%join then all together as one file.  Will work for not vertices files as
%well just make sure they have the same number of columns.  This version
%will look at all the data files, pick the lowest number, and then extract
%a random subset from the other larger datasets, then join them. Basically,
%this subfunctions makes all the datasets the same size, for the occassion
%when some datasets are way bigger than others.  In practic this is not
%very useful
%Synatax:   [data] = join_verts();
%Input:     dir_tmp = the directory of interest
%Output:    output = the joined data set. Sturucture with fields: data = the
%               data from each file and name = the filenames in order.

if nargin==0||isempty(dir_tmp)    %no input
    prompt_box('title','Open File Location','prompt1','Select the vertex lists you want to combine.','position','center');
    pause(0.25);
    %     [filename_tmp,pathname_tmp,filterindex] = uigetfile2({'*.csv','Text files (*.csv)';'*.xlsx','Excel 2007 files (*.xlsx)';...
    %         '*.xls','Excel files (*.xls)';'*.*','All Files';},...
    %         'Open Stack','Multiselect','on');
    dir_tmp = uigetdir2('','Directory where the files are located');    %get the directory
end
if nargin<2
    norm_prop = 0;   %off
end
%Now sort through the directory and find the
[filenames,names] = dir_sort(dir_tmp);
%Now open each file append and save
mkdir(dir_tmp,'joined_norm');
if norm_prop
    mkdir(dir_tmp,'joined_norm_log');
    mkdir(dir_tmp,'joined_norm_median');
end
for j = 1:size(filenames,2) %go through the file sets
    curr_filenames = filenames{j};
    data = [];  %initialize/reset
    verti_tmp = [];
    if norm_prop
        qdata = [];  %initialize/reset
        qverti_tmp = [];
        mdata = [];  %initialize/reset
        mverti_tmp = [];
    end
    min_size = 0;   %initiate
    for i = 1:size(curr_filenames,1)      %now append the files
        filename_tmp = curr_filenames(i,:);
        %verti_tmp{i} = single(dlmread([pathname_tmp,filename_tmp{1,i}],','));
        try     %file could be empty
            verti_tmp = single(dlmread([dir_tmp,'\',filename_tmp],',',1,0));
        end
        if min_size>size(verti_tmp,1)||min_size==0
            min_size = size(verti_tmp,1);   %get the smallest number in the bunch
        end
        data_tmp(i).verts = verti_tmp;  %store the data in a structure
        if norm_prop  && ~isempty(verti_tmp)
            mverti_tmp = manorm(verti_tmp,'Method','median','LogData',0);
            qverti_tmp = verti_tmp;
            for o = 1:size(int_loc,2)
                qverti_tmp(:,int_loc(o)) = log2(verti_tmp(:,int_loc(o)));
                %qverti_tmp(:,int_loc(o)) = manorm(log2(verti_tmp(:,int_loc(o))),'Method','median','LogData',1);
            end
            data_tmp(i).qverts = qverti_tmp;  %store the data in a structure
            data_tmp(i).mverts = mverti_tmp;  %store the data in a structure
        end
    end
    %now extract a random set if the data is larger
    for k = 1:size(curr_filenames,1)
        verti_tmp = data_tmp(k).verts;  %grab the dataset
        if ~isempty(verti_tmp)  %if empty skip
            if size(verti_tmp,1)~=min_size   %if it is not large you have a problem, if it is the same then skip
                %*********************Randomize and Pullout***************************
                rand_ord = rand(size(verti_tmp,1),1);
                rand_verts = [rand_ord verti_tmp];     %create a mashup of ordinal and random ordinals
                rand_verts = sortrows(rand_verts);  %shuffle the ordinal order
                verti_tmp = rand_verts(1:min_size,2:end);  %grab the random set that match the minimum size
                if norm_prop
                    qverti_tmp = data_tmp(k).qverts;  %grab the dataset
                    qrand_verts = [rand_ord qverti_tmp];     %create a mashup of ordinal and random ordinals
                    qrand_verts = sortrows(qrand_verts);  %shuffle the ordinal order
                    qverti_tmp = qrand_verts(1:min_size,2:end);  %grab the random set that match the minimum size
                    mverti_tmp = data_tmp(k).mverts;  %grab the dataset
                    mrand_verts = [rand_ord mverti_tmp];     %create a mashup of ordinal and random ordinals
                    mrand_verts = sortrows(mrand_verts);  %shuffle the ordinal order
                    mverti_tmp = mrand_verts(1:min_size,2:end);  %grab the random set that match the minimum size
                end
            end
        end
        data = vertcat(data,verti_tmp);       %join all the opened data together
        if norm_prop
            qdata = vertcat(qdata,qverti_tmp);       %join all the opened data together
            mdata = vertcat(mdata,mverti_tmp);       %join all the opened data together
        end
    end
    if ~isempty(data)
        %remove the NaN
        data(isnan(data(:,1)),:) = [];
        %now save out the data file
        dataout = dataset(data);
        sav2csv(dataout,[names{j,1},'_',num2str(size(data,1)),'.csv'],[dir_tmp,filesep,'joined_norm']);
        output(j).data = data;
        if norm_prop
            qdata(isnan(qdata(:,1)),:) = [];
            qdataout = dataset(qdata);
            sav2csv(qdataout,[names{j,1},'_',num2str(size(qdata,1)),'.csv'],[dir_tmp,filesep,'joined_norm_log']);
            output(j).qdata = qdata;
            mdata(isnan(mdata(:,1)),:) = [];
            mdataout = dataset(mdata);
            sav2csv(mdataout,[names{j,1},'_',num2str(size(mdata,1)),'.csv'],[dir_tmp,filesep,'joined_norm_median']);
            output(j).mdata = mdata;
        end
    end
    clear data_tmp curr_filenames
end
output(1).names = names;
%----------------------------------------------------------------------------------------------------
function [output] = join_props_loco(dir_tmp,int_loc)
%This function is a lot like join_verts except the joining is horizontal,
%and is per column.
%Synatax:   [data] = join_prop();
%Input:     dir_tmp = the directory of interest
%Output:    output = the joined data set. Sturucture with fields: data = the
%               data from each file and name = the filenames in order.

if nargin==0
    prompt_box('title','Open File Location','prompt1','Select the vertex lists you want to combine.','position','center');
    pause(0.25);
    %     [filename_tmp,pathname_tmp,filterindex] = uigetfile2({'*.csv','Text files (*.csv)';'*.xlsx','Excel 2007 files (*.xlsx)';...
    %         '*.xls','Excel files (*.xls)';'*.*','All Files';},...
    %         'Open Stack','Multiselect','on');
    dir_tmp = uigetdir2('','Directory where the files are located');    %get the directory
end
%Now sort through the directory and find the
[filenames,names] = dir_sort(dir_tmp);
%Now open each file append and save
warning('OFF','last');
mkdir(dir_tmp,'joined_prop');
mkdir(dir_tmp,'joined_prop_log');
mkdir(dir_tmp,'joined_prop_median');
for j = 1:size(filenames,2) %go through the file sets
    filename_tmp = filenames{j};
    data = [];
    for i = 1:size(filename_tmp,1)      %now append the files
        %verti_tmp{i} = single(dlmread([pathname_tmp,filename_tmp{1,i}],','));
        try     %file could be empty
            prop_tmp = single(dlmread([dir_tmp,filesep,filename_tmp(i,:)],',',1,0));
            %normalization
            mprop_tmp = manorm(prop_tmp,'Method','median','LogData',0);
            qprop_tmp = prop_tmp;
            for o = 1:size(int_loc,2)
                qprop_tmp(:,int_loc(o)) = log2(prop_tmp(:,int_loc(o)));
                %qprop_tmp(:,int_loc(o)) = manorm(log2(prop_tmp(:,int_loc(o))),'Method','median','LogData',1);
            end
            arraysize(i) = size(prop_tmp,1);     
            for k = 1:size(prop_tmp,2)  %step through columns
                data{i,k} = prop_tmp(:,k);
                %normalized data
                qdata{i,k} = qprop_tmp(:,k);
                mdata{i,k} = mprop_tmp(:,k);
            end
        end
    end
    if ~isempty(data)   %make sure there is data first
        for m = 1:size(data,2)
            data_tmp = nan(max(arraysize),i);
            %norm data
            qdata_tmp = nan(max(arraysize),i);
            mdata_tmp = nan(max(arraysize),i);
            for l = 1:size(data,1)
                data_tmp(1:size(data{l,m},1),l) = data{l,m};
                %norm data
                qdata_tmp(1:size(qdata{l,m},1),l) = qdata{l,m};
                mdata_tmp(1:size(mdata{l,m},1),l) = mdata{l,m};
            end
            %now save out the data file
            dataout = dataset(data_tmp);
            sav2csv(dataout,[names{j,1},'_prop',num2str(m),'.csv'],[dir_tmp,filesep,'joined_prop']);
            output(j).data = data;
            %norm data
            qdataout = dataset(qdata_tmp);
            sav2csv(qdataout,[names{j,1},'_prop',num2str(m),'.csv'],[dir_tmp,filesep,'joined_prop_log']);
            output(j).qdata = qdata;
            mdataout = dataset(mdata_tmp);
            sav2csv(mdataout,[names{j,1},'_prop',num2str(m),'.csv'],[dir_tmp,filesep,'joined_prop_median']);
            output(j).mdata = mdata;
        end
    end
end
output(1).names = names;
%----------------------------------------------------------------------------------------------------
function [prop_struct,dataset_name,output] = sum_collate_loco(dir_tmp,idx,int_loc)
%This function will go through a set of properties files, calculates
%properties for each file, sort files into groups (based on same name) and
%output each group as a single file.  Should be used in conjunction with
%join_verts.
%Synatax:   [data] = sum_collate(dir_tmp);
%           idx = the property column to use as the filter group
%               use sum_collate([],idx) to call with index only
%Input:     dir_tmp = the directory of interest
%Output:    output = the data set. Sturucture with fields: data = the
%               summary data from each file and name = the filenames in 
%               order.

%Now sort through the directory and find the
[filenames,names,size_array,headers] = dir_sort(dir_tmp);
%get the largest number of files groups
max_size = max(size_array);
data_col = [];  %initialize array
qdata_col = [];
mdata_col = [];
col_name = [];
filecount = 0;

%Now open each file append and save
mkdir(dir_tmp,'sum_collated');
mkdir(dir_tmp,'sum_collated_log');
mkdir(dir_tmp,'sum_collated_median');
for j = 1:size(filenames,2) %go through the file sets
    filename_tmp = filenames{j};
    data = [];  %initialize/reset
    data_col_tmp = [];
    qdata = [];  %initialize/reset
    qdata_col_tmp = [];
    mdata = [];  %initialize/reset
    mdata_col_tmp = [];
    col_name_tmp = {[names{j},'.ave'];[names{j},'.std'];[names{j},'.size'];[names{j},'.med'];[names{j},'.mad']};    %prepare the observation naming array 
    for i = 1:size(headers,1)      %for each file generate statistics
        %see if the header matches any of the files
        fname_tmp = [];
        for o = 1:size(filename_tmp,1)
            fname_tmp = strfind(filename_tmp(o,:),headers{i});
            if fname_tmp  %if we find a matching header break out
                fname_tmp = filename_tmp(o,:);
                break
            end
        end
        if ~isempty(fname_tmp)  %found a file
            try     %field could be empty
                prop_tmp = single(dlmread([dir_tmp,filesep,fname_tmp],',',1,0));    %open the file
            catch
                prop_tmp = [];  %it's empty
            end
        else
            prop_tmp = [];
        end
        if isempty(data_col_tmp)     %first time through generate temp for collated data
            %this is complicated but here goes
            if isempty(prop_tmp)&&i==1  %first time through and no data
                if isempty(data_col)    %no priors
                    col_num = 4;    %default for last resort
                    for n = i:size(filename_tmp,1)     %search the files for one that have data
                        strmask = isstrprop(strtrim(filename_tmp(n,:)),'punct');  %find the punctuations, we only want the last two
                        strmask2 = strtrim(filename_tmp(n,:))=='&';       %we are going to exempt the & character
                        strmask3 = isstrprop(strtrim(filename_tmp(n,:)),'wspace');    %included the spaces as well
                        strmask = strmask-strmask2+strmask3;     %removed and append
                        [x,y] = find(strmask==1);  %get the positions
                        if str2num(filename_tmp(n,y(end-1)+1:y(end)-1))>0   %file with data
                            fid = fopen([dir_tmp,filesep,filename_tmp(n,:)]);    %open the file
                            str = textscan(fid,'%s');   %grab the headings
                            str = str{1}{1};   %convert to string
                            col_num = find(str==',');   %assuming comma delimted files
                            col_num = size(col_num,2)+1;    %number of columns
                            break
                        end
                    end
                else    %there are priors
                    col_num = size(data_col,2);
                end
                data_col_tmp = zeros(5,col_num,max_size);   %there!
                %normalized datasets
                qdata_col_tmp = zeros(5,col_num,max_size);   %there!
                mdata_col_tmp = zeros(5,col_num,max_size);   %there!
            elseif isempty(prop_tmp)    %not the first time through
                data_col_tmp = zeros(5,size(data_col_tmp,2),max_size);
                %normalized datasets
                qdata_col_tmp = zeros(5,size(data_col_tmp,2),max_size);
                mdata_col_tmp = zeros(5,size(data_col_tmp,2),max_size);
            else
                data_col_tmp = zeros(5,size(prop_tmp,2),max_size);   %temporary array for holding properites data
                %normalized datasets
                qdata_col_tmp = zeros(5,size(prop_tmp,2),max_size);
                mdata_col_tmp = zeros(5,size(prop_tmp,2),max_size);
            end
        end
        if isempty(prop_tmp)   %don't process if empty
            prop_tmp = nan(1,size(data_col_tmp,2));
        end
        %the data matrix is 4 statistic by the number of properties by
        %number of instances of that channel or type
        %generate statistics
        %now find the outliers and remove the data
        %first calculate our criteria
        out = nanmedian(prop_tmp(:,idx))+std(prop_tmp(:,idx))*3;     %3 times the standard deviation should do it.
        [x,y] = find(prop_tmp(:,idx)>out);        %find the outliers.
        prop_tmp(x,:) = [];             %remove the outliers
        %now remove our NaN for size
        tmp = prop_tmp;
        tmp(isnan(prop_tmp(:,1)),:) = [];
        %do some math on the data
        data(1,:,i) = nanmean(prop_tmp,1);
        data(2,:,i) = nanstd(prop_tmp,1);
        data(3,:,i) = size(tmp,1);
        data(4,:,i) = nanmedian(prop_tmp,1);
        data(5,:,i) = mad(prop_tmp,1,1);
        %now create the collated dataset
        data_col_tmp(1,:,i) = nanmean(prop_tmp,1);
        data_col_tmp(2,:,i) = nanstd(prop_tmp,1);
        data_col_tmp(3,:,i) = size(tmp,1);
        data_col_tmp(4,:,i) = nanmedian(prop_tmp,1);
        data_col_tmp(5,:,i) = mad(prop_tmp,1,1);
        %normalize
        mprop_tmp = manorm(prop_tmp,'Method','median','LogData',0);
        qprop_tmp = prop_tmp;
        for o = 1:size(int_loc,2)
            qprop_tmp(:,int_loc(o)) = log2(prop_tmp(:,int_loc(o)));
            %qprop_tmp(:,int_loc(o)) = manorm(log2(prop_tmp(:,int_loc(o))),'Method','median','LogData',1);
        end
        %quantile norm
        qdata(1,:,i) = nanmean(qprop_tmp,1);
        qdata(2,:,i) = nanstd(qprop_tmp,1);
        qdata(3,:,i) = size(tmp,1);
        qdata(4,:,i) = nanmedian(qprop_tmp,1);
        qdata(5,:,i) = mad(qprop_tmp,1,1);
        qdata_col_tmp(1,:,i) = nanmean(qprop_tmp,1);
        qdata_col_tmp(2,:,i) = nanstd(qprop_tmp,1);
        qdata_col_tmp(3,:,i) = size(tmp,1);
        qdata_col_tmp(4,:,i) = nanmedian(qprop_tmp,1);
        qdata_col_tmp(5,:,i) = mad(qprop_tmp,1,1);
        %median norm
        mdata(1,:,i) = nanmean(mprop_tmp,1);
        mdata(2,:,i) = nanstd(mprop_tmp,1);
        mdata(3,:,i) = size(tmp,1);
        mdata(4,:,i) = nanmedian(mprop_tmp,1);
        mdata(5,:,i) = mad(mprop_tmp,1,1);
        mdata_col_tmp(1,:,i) = nanmean(mprop_tmp,1);
        mdata_col_tmp(2,:,i) = nanstd(mprop_tmp,1);
        mdata_col_tmp(3,:,i) = size(tmp,1);
        mdata_col_tmp(4,:,i) = nanmedian(mprop_tmp,1);
        mdata_col_tmp(5,:,i) = mad(mprop_tmp,1,1);
        %store all the properites for cross set normalization
        if i==1 %first time through
            total_prop = prop_tmp;
            %parse current channel
            fidx = strfind(filename_tmp(i,:),'_');
            bidx = strfind(filename_tmp(i,:),'.');
            ch_name = filename_tmp(i,fidx(end)+1:bidx-1);
        else %not the first now we need pad the dataset
            ptmp = prop_tmp;
            if size(total_prop,1)<size(prop_tmp,1)  %pad total_prop
                total_prop = vertcat(total_prop,nan(size(prop_tmp,1)-size(total_prop,1),size(prop_tmp,2),size(total_prop,3)));
            end
            if size(total_prop,1)>size(prop_tmp,1)  %pad prop_tmp
                ptmp = vertcat(prop_tmp,nan(size(total_prop,1)-size(prop_tmp,1),size(prop_tmp,2)));
            end
            total_prop(:,:,i) = ptmp;
        end
    end
    %save out total_prop in output structure
    if j==1  %initiate
        prop_struct = struct(ch_name,total_prop);
    else
        prop_struct.(ch_name) = total_prop;
    end
    %now save out the data file
    warning 'off'
    for k = 1:size(prop_tmp,2)     %step through the properties
        data_tmp = reshape(data(:,k,:),[5,size(data,3),1]);
        %filename_tmp = cellstr(filename_tmp);
        dataout = dataset({data_tmp,headers{:}});
        sav2csv(dataout,[names{j},'_col_sum.prop',num2str(k),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qdata_tmp = reshape(qdata(:,k,:),[5,size(qdata,3),1]);
        qdataout = dataset({qdata_tmp,headers{:}});
        sav2csv(qdataout,[names{j},'_col_sum.prop',num2str(k),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %normalized data-median
        mdata_tmp = reshape(mdata(:,k,:),[5,size(mdata,3),1]);
        mdataout = dataset({mdata_tmp,headers{:}});
        sav2csv(mdataout,[names{j},'_col_sum.prop',num2str(k),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    end
    output(j).data = data;
    %norm data
    output(j).qdata = qdata;
    output(j).mdata = mdata;
    %put collated data together
    data_col = vertcat(data_col,data_col_tmp);
    %normalized data
    qdata_col = vertcat(qdata_col,qdata_col_tmp);
    mdata_col = vertcat(mdata_col,mdata_col_tmp);
    col_name = vertcat(col_name,col_name_tmp);
%     if filecount<size(filename_tmp,1);  %give me the number of datasets in this dataset
%         filecount = size(filename_tmp,1);
%         filecountidx = j;   %filenames location
%     end
end
output(1).names = names;
%create the filenames array for the collated data
max_loc = find(size_array==max(size_array));    %locations of where you have all of the datasets present
filename_tmp = filenames{max_loc(1)};   %any of the max sets is fine, we'll just take the first
strmask = isstrprop(filename_tmp,'punct');  %find the punctuations
for m = 1:max_size
    [x,y] = find(strmask(m,:)==1);  %get the positions
    dataset_name{m,1} = filename_tmp(m,1:y(1)-1);    %grab the first punctuation deliminted characters, which should be unique
end

%save out the collated data
for l = 1:size(data_col,2)     %step through the properties
    col_cache = reshape(data_col(:,l,:),[size(data_col,1),size(data_col,3),1]);
    %norm data
    qcol_cache = reshape(qdata_col(:,l,:),[size(qdata_col,1),size(qdata_col,3),1]);
    mcol_cache = reshape(mdata_col(:,l,:),[size(mdata_col,1),size(mdata_col,3),1]);
    col_data = dataset({col_cache,dataset_name{:}},'obsname',col_name);
    sav2csv(col_data,['col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
    %norm data
    qcol_data = dataset({qcol_cache,dataset_name{:}},'obsname',col_name);
    sav2csv(qcol_data,['col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
    mcol_data = dataset({mcol_cache,dataset_name{:}},'obsname',col_name);
    sav2csv(mcol_data,['col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    %save a version of the file that is easier to collate manually, a
    %raw version without all of the statistics, averages only
    col_tmp = col_cache(1:5:end,:);
    col_name_tmp = col_name(1:5:end,:);
    col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
    sav2csv(col_data_tmp,['ave_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qcol_tmp = qcol_cache(1:5:end,:);
        qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(qcol_data_tmp,['ave_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %median
        mcol_tmp = mcol_cache(1:5:end,:);
        mcol_data_tmp = dataset({mcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(mcol_data_tmp,['ave_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    %output std
    col_tmp = col_cache(2:5:end,:);
    col_name_tmp = col_name(2:5:end,:);
    col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
    sav2csv(col_data_tmp,['std_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qcol_tmp = qcol_cache(2:5:end,:);
        qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(qcol_data_tmp,['std_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %median
        mcol_tmp = mcol_cache(2:5:end,:);
        mcol_data_tmp = dataset({mcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(mcol_data_tmp,['std_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    %output count
    col_tmp = col_cache(3:5:end,:);
    col_name_tmp = col_name(3:5:end,:);
    col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
    sav2csv(col_data_tmp,['count_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qcol_tmp = qcol_cache(3:5:end,:);
        qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(qcol_data_tmp,['count_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %median
        mcol_tmp = mcol_cache(3:5:end,:);
        mcol_data_tmp = dataset({mcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(mcol_data_tmp,['count_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    %output median
    col_tmp = col_cache(4:5:end,:);
    col_name_tmp = col_name(4:5:end,:);
    col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
    sav2csv(col_data_tmp,['median_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qcol_tmp = qcol_cache(4:5:end,:);
        qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(qcol_data_tmp,['median_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %median
        mcol_tmp = mcol_cache(4:5:end,:);
        mcol_data_tmp = dataset({mcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(mcol_data_tmp,['median_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
    %output mad
    col_tmp = col_cache(5:5:end,:);
    col_name_tmp = col_name(5:5:end,:);
    col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
    sav2csv(col_data_tmp,['mad_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated']);
        %normalized data-quantile
        qcol_tmp = qcol_cache(5:5:end,:);
        qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(qcol_data_tmp,['mad_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_log']);
        %median
        mcol_tmp = mcol_cache(5:5:end,:);
        mcol_data_tmp = dataset({mcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
        sav2csv(mcol_data_tmp,['mad_col_sum.prop',num2str(l),'.csv'],[dir_tmp,filesep,'sum_collated_median']);
end
warning 'on'
%----------------------------------------------------------------------------------------------------
function [allprop] = exposure_norm(allprop,exp,minmax,out_lookup,dataset_name,int_loc,class,minormax)
%This function will go through an allprop structure and generate summary
%statistics.  dir_root is expected to be the directory up to KO/WT AM/PM
%folder
root_flds = fieldnames(allprop);  %get the root fieldnames, should be KO/WT or AM/PM
%parse the structure
for i = 1:size(root_flds,1) %go through each root fields
    second_flds = fieldnames(allprop.(root_flds{i}));  %get the fieldnames, should be Layer23...Layer4...etc. at this point, this is field to normalize across
    for j = 1:size(second_flds,1)
        region_flds = 'R1';
        %grab the current datasetname
        curr_setname = dataset_name{i,j};  %the is assumed that size(dataset_name,1) = size(data_tmp,3)
        for k = 1  %this is where you would put in R1..R2, but currently we don't worry about it
            ch_flds = fieldnames(allprop.(root_flds{i}).(second_flds{j}));  %get the channel fieldnames, e.g., PSD95...
            for l = 1:size(ch_flds,1)
                %now normalize
                %grab data
                data_tmp = allprop.(root_flds{i}).(second_flds{j}).(ch_flds{l});
                for m = 1:size(int_loc,2)  %We only want to normalize the intensity data
                    for n = 1:size(data_tmp,3)  %go through each experimental set
                        %find the dataset label
                        curr_dataset = curr_setname{n};
                        datenum = lookup_date(out_lookup,root_flds{i},second_flds{j},region_flds,class,curr_dataset);
                        curr_exp = find_exp(exp,root_flds{i},second_flds{j},region_flds,datenum,ch_flds{l});
                        %now normalize to max
                        if ~isempty(curr_exp)
                            norm_array = gen_normarray(minmax,ch_flds{l},curr_exp,size(data_tmp,1),minormax);
                            data_tmp(:,int_loc(m),n) = data_tmp(:,int_loc(m),n).*norm_array;
                        end
                    end
                end
                %return the normalized data
                allprop.(root_flds{i}).(second_flds{j}).(ch_flds{l})= data_tmp;
            end
        end
    end
end
%----------------------------------------------------------------------------------------------------
function [allprop] = exposure_pairnorm(allprop,exp,out_lookup,dataset_name,int_loc,class,minormax)
%This function will go through an allprop structure and generate summary
%statistics.  This will normalize not to the global max or min, but to its
%paired experimental partner
root_flds = fieldnames(allprop);  %get the root fieldnames, should be KO/WT or AM/PM

second_flds = fieldnames(allprop.(root_flds{1}));  %get the fieldnames, should be Layer23...Layer4...etc. at this point, this is field to normalize across
for j = 1:size(second_flds,1)
    region_flds = 'R1';
    %grab the current datasetname
    curr_setname = dataset_name{1,j};  %the is assumed that size(dataset_name{1,j},1) = size(data_tmp,3)
    for k = 1  %this is where you would put in R1..R2, but currently we don't worry about it
        ch_flds = fieldnames(allprop.(root_flds{1}).(second_flds{j}));  %get the channel fieldnames, e.g., PSD95...
        for l = 1:size(ch_flds,1)
            %now normalize
            for n = 1:size(curr_setname,1)  %go through each experimental set
                curr_exp = cell(1,size(root_flds,1));
                minmax = [];
                %find the dataset label
                curr_dataset = curr_setname{n};
                try
                datenum = lookup_date(out_lookup,root_flds{1},second_flds{j},region_flds,class,curr_dataset);
                catch
                    keyboard
                end
                %first grab the exposure and find the paired local max/min
                for i = 1:size(root_flds,1)
                    exp_tmp = find_exp(exp,root_flds{i},second_flds{j},region_flds,datenum,ch_flds{l});
                    if isempty(minmax)  %initiate
                        minmax{1} = ch_flds{l};
                        minmax{2} = exp_tmp;
                        minmax{3} = exp_tmp;
                        curr_exp{i} = exp_tmp;
                        norm_on = 0;  %don't need to normalize if you do not have a pair.
                    else  %n times through
                        curr_exp{i} = exp_tmp;
                        if exp_tmp>minmax{2}
                            minmax{2} = exp_tmp;
                        elseif exp_tmp<minmax{3}
                            minmax{3} = exp_tmp;
                        end
                        norm_on = 1;
                    end
                end
                %now normalize to max
                if ~isempty(minmax{2}) && ~isempty(minmax{3}) && minmax{2}~=minmax{3} && norm_on
                    for o = 1:size(root_flds,1)
                        if ~isempty(curr_exp{o})  %if there is no channel in this condition skip
                            %grab data
                            data_tmp = allprop.(root_flds{o}).(second_flds{j}).(ch_flds{l});
                            for m = 1:size(int_loc,2)  %We only want to normalize the intensity data
                                norm_array = gen_normarray(minmax,ch_flds{l},curr_exp{o},size(data_tmp,1),minormax);
                                try
                                data_tmp(:,int_loc(m),n) = data_tmp(:,int_loc(m),n).*norm_array;
                                catch
                                    keyboard
                                end
                            end
                            %return the normalized data
                            allprop.(root_flds{o}).(second_flds{j}).(ch_flds{l})= data_tmp;
                        end
                    end
                end
            end
        end
    end
end

%----------------------------------------------------------------------------------------------------
function norm_array = gen_normarray(minmax,curr_ch,curr_exp,array_sz,minormax)
%generate the normalization array
for i = 1:size(minmax,1)
    if strcmpi(minmax(i,1),curr_ch)  %found it
        switch minormax
            case 'min'
                norm_factor = curr_exp/minmax{i,3};
            otherwise %max
                norm_factor = minmax{i,2}/curr_exp;
        end
        norm_array = repmat(norm_factor,array_sz,1);
        break
    end
end
%----------------------------------------------------------------------------------------------------
function curr_exp = find_exp(exp,cond,layer,region,datenum,curr_ch)
%Find the current exposure for this channel
root_flds = fieldnames(exp);  %get the root fieldnames, should be date_20110908...
curr_exp = [];
for i = 1:size(root_flds,1)
    if strcmpi(root_flds(i),['date_',datenum{1}])  %found the date, dig deeper
        second_flds = fieldnames(exp.(root_flds{i}));
        for j = 1:size(second_flds,1)%look at condition, e.g., KO/WT, AM/PM
            if strcmpi(second_flds(j),cond)  %dig deeper
                layer_flds = fieldnames(exp.(root_flds{i}).(second_flds{j}));
                for k = 1:size(layer_flds,1)  %look at layer, e.g, Layer 4, Layer 5...
                    if strcmpi(layer_flds(k),layer)
                        region_flds = fieldnames(exp.(root_flds{i}).(second_flds{j}).(layer_flds{k}));
                        for l = 1:size(region_flds,1)  %region, e.g., R1, R2...
                            if strcmpi(region_flds(l),region)
                                ch_flds = exp.(root_flds{i}).(second_flds{j}).(layer_flds{k}).(region_flds{l});
                                for m = 1:size(ch_flds,1)   %class, e.g., total, VGluT1, VGluT2
                                    if strcmpi(ch_flds(m,1),curr_ch)
                                        curr_exp = ch_flds{m,2};
                                        break
                                    end
                                end
                            end
                        end
                    end
                end
            end
        end
    end
end
if isempty(curr_exp)
    disp(['[',curr_ch,']',' Field is not found in: ',datenum{1},' ',cond,' ',layer,' ',region]);
end
%----------------------------------------------------------------------------------------------------
function datenum = lookup_date(out_lookup,cond,layer,region,class,setname)
%Find the date from the look up table
for i = 1:size(out_lookup,1)
    if strcmpi(out_lookup(i,2),cond)  %look at condition, e.g., KO/WT, AM/PM
        if strcmpi(out_lookup(i,3),layer)  %look at layer, e.g, Layer 4, Layer 5...
            if strcmpi(out_lookup(i,4),region) %region, e.g., R1, R2...
                if strcmpi(out_lookup(i,5),class)  %class, e.g., total, VGluT1, VGluT2
                    if strcmpi(out_lookup(i,6),setname)
                        %OK we found it
                        datenum = out_lookup(i,1);
                        break
                    end
                end
            end
        end
    end
end
%----------------------------------------------------------------------------------------------------
function [] = process_allprop(allprop,dir_root,dataset_name_all,norm_type,output_dir)
%This function will go through an allprop structure and generate summary
%statistics.  dir_root is expected to be the directory up to KO/WT AM/PM
%folder
root_flds = fieldnames(allprop);  %get the root fieldnames, should be KO/WT or AM/PM
root_num = size(root_flds,1);       %get the number of root fields, we will need to normalize across this
%parse the structure
for i = 1:size(root_flds,1) %go through each root fields
    second_flds{i} = fieldnames(allprop.(root_flds{i}));  %get the fieldnames, should be Layer23...Layer4...etc. at this point, this is field to normalize across
    for j = 1:size(second_flds{i},1)
        third_flds{i,j} = fieldnames(allprop.(root_flds{i}).(second_flds{i}{j}));  %get the fieldnames, final fieldnames at this point
    end
end

warning('OFF','last');
%now collate data across the fields for normalization and analysis
for k = 1:size(third_flds,2)  %go through each cross field comparison group, e.g. Layer23...
    data_col_all = [];  %initialize
    qdata_col_all = [];
    col_name = [];
    for m = 1:size(third_flds{1,k},1)  %now look through each field individually
        prop_tmp = allprop.(root_flds{1}).(second_flds{1}{k}).(third_flds{1,k}{m});  %grab the data
        decon_idx = size(prop_tmp,3); %initiate the deconstruction matrix
        %now see if there is a matching field in the other root,
        %e.g., KO/WT...
        [match_data] = match_fields(third_flds,third_flds{1,k}(m),k);
        if max(match_data)>0  %there is a match proceed
            for l = 1:size(match_data,2) %will group and normalize across each root field e.g., KO/WT...
                if match_data(l)==0  %There is no match in the current field
                    decon_idx(l+1) = 0;
                else  %there is a match
                    [prop_tmp,decon_idx(l+1)] = match_catz(prop_tmp,allprop.(root_flds{l+1}).(second_flds{l+1}{k}).(third_flds{l+1,k}{match_data(l)}));
                end
            end
            %OK now the data should be collated, each property in x,
            %each measure in y and each sample in z.  We want to
            %normalize across z.
            y = size(prop_tmp,1);
            x = size(prop_tmp,2);
            z = size(prop_tmp,3);
            switch norm_type
                case 'quantile'
                    qprop_tmp = nan(size(prop_tmp));
                    %normalize across each property(x), then store in in array
                    for n = 1:x
                        tmp = quantilenormloco(reshape(prop_tmp(:,n,:),[y,z,1]));
                        qprop_tmp(:,n,:) = reshape(tmp,[y,1,z]);
                    end
                    prop_tmp = qprop_tmp;
            end
            %now a quantile normalized dataset is available.  Now
            %summary statistics and save out.
            col_name_tmp = {[third_flds{1,k}{m},'.ave'];[third_flds{1,k}{m},'.std'];[third_flds{1,k}{m},'.size'];[third_flds{1,k}{m},'.med'];[third_flds{1,k}{m},'.mad']};    %prepare the observation naming array
            %do some math on the data
            for o = 1:size(prop_tmp,3)
                data_col_tmp(1,:,o) = nanmean(prop_tmp(:,:,o),1);
                data_col_tmp(2,:,o) = nanstd(prop_tmp(:,:,o),1);
                data_col_tmp(3,:,o) = size(prop_tmp(:,:,o),1);
                data_col_tmp(4,:,o) = nanmedian(prop_tmp(:,:,o),1);
                data_col_tmp(5,:,o) = mad(prop_tmp(:,:,o),1,1);
%                 %quantile norm
%                 qdata_col_tmp(1,:,o) = nanmean(qprop_tmp(:,:,o),1);
%                 qdata_col_tmp(2,:,o) = nanstd(qprop_tmp(:,:,o),1);
%                 qdata_col_tmp(3,:,o) = size(qprop_tmp(:,:,o),1);
%                 qdata_col_tmp(4,:,o) = nanmedian(qprop_tmp(:,:,o),1);
%                 qdata_col_tmp(5,:,o) = mad(qprop_tmp(:,:,o),1,1);
            end
            %put collated data together
            data_col_all = vertcat(data_col_all,data_col_tmp);
            %normalized data
%             qdata_col_all = vertcat(qdata_col_all,qdata_col_tmp);
            col_name = vertcat(col_name,col_name_tmp);
        end
    end
    %now collation is complete
    %dataset_name = third_flds{1,k};  %create a dataset label
    %save out the data
    b = 0; %initiate
    for p = 1:size(third_flds,1)  %split out and save out.
        %create indexes for deconstructing the collated data
        a = b+1;  %index for the start of the field.  
        b = b+decon_idx(p); %index for the end of the field
        %generate the directory to be saved into
        dir_tmp = [dir_root,filesep,root_flds{p},filesep,second_flds{p}{k}];
        mkdir(dir_tmp,output_dir)
%         mkdir(dir_tmp,'sum_collated_raw');
%         mkdir(dir_tmp,'sum_collated_quantile');
        dataset_name = dataset_name_all{p,k};
        %now parse the data structure
        data_col = data_col_all(:,:,a:b);
%         qdata_col = qdata_col_all(:,:,a:b);
        for q = 1:size(data_col,2)     %step through the properties
            col_cache = reshape(data_col(:,q,1:decon_idx(p)),[size(data_col,1),decon_idx(p),1]);
            %norm data
%             qcol_cache = reshape(qdata_col(:,q,1:decon_idx(p)),[size(qdata_col,1),decon_idx(p),1]);
            col_data = dataset({col_cache,dataset_name{:}},'obsname',col_name);
            sav2csv(col_data,['col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %norm data
%             qcol_data = dataset({qcol_cache,dataset_name{:}},'obsname',col_name);
%             sav2csv(qcol_data,['col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
            %save a version of the file that is easier to collate manually, a
            %raw version without all of the statistics, averages only
            col_tmp = col_cache(1:5:end,:);
            col_name_tmp = col_name(1:5:end,:);
            col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
            sav2csv(col_data_tmp,['ave_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %normalized data-quantile
%             qcol_tmp = qcol_cache(1:5:end,:);
%             qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
%             sav2csv(qcol_data_tmp,['ave_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
            %output std
            col_tmp = col_cache(2:5:end,:);
            col_name_tmp = col_name(2:5:end,:);
            col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
            sav2csv(col_data_tmp,['std_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %normalized data-quantile
%             qcol_tmp = qcol_cache(2:5:end,:);
%             qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
%             sav2csv(qcol_data_tmp,['std_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
            %output count
            col_tmp = col_cache(3:5:end,:);
            col_name_tmp = col_name(3:5:end,:);
            col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
            sav2csv(col_data_tmp,['count_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %normalized data-quantile
%             qcol_tmp = qcol_cache(3:5:end,:);
%             qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
%             sav2csv(qcol_data_tmp,['count_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
            %output median
            col_tmp = col_cache(4:5:end,:);
            col_name_tmp = col_name(4:5:end,:);
            col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
            sav2csv(col_data_tmp,['median_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %normalized data-quantile
%             qcol_tmp = qcol_cache(4:5:end,:);
%             qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
%             sav2csv(qcol_data_tmp,['median_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
            %output mad
            col_tmp = col_cache(5:5:end,:);
            col_name_tmp = col_name(5:5:end,:);
            col_data_tmp = dataset({col_tmp,dataset_name{:}},'obsname',col_name_tmp);
            sav2csv(col_data_tmp,['mad_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,output_dir]);
%             %normalized data-quantile
%             qcol_tmp = qcol_cache(5:5:end,:);
%             qcol_data_tmp = dataset({qcol_tmp,dataset_name{:}},'obsname',col_name_tmp);
%             sav2csv(qcol_data_tmp,['mad_col_sum.prop',num2str(q),'.csv'],[dir_tmp,filesep,'sum_collated_quantile']);
        end
    end
end
warning 'on'
%---------------------------------------------------------------------------------------------------
%this little function scan through the fields and see if there are matches
%across the roots, e.g., KO/WT, col represents the column or the second
%field, e.g., Layer23...
function [match_data] = match_fields(third_flds,curr_fld,col)
for i = 2:size(third_flds,1) %step through root fields, we are assuming that curr_flds is a 1x1 cell and that it came from the first root field, e.g., row 1 of third_flds
    fields_tmp = third_flds{i,col};
    match_srch = strcmpi(curr_fld,fields_tmp);
    if max(match_srch)==0 %no match
        match_data(i-1) = 0;
    else  %match
        match_data(i-1) = find(match_srch==1);  %store the location of the match
    end
end
%---------------------------------------------------------------------------------------------------
%this little function will resize two matrixes and cat along the z
function [data_out,data2_sz] = match_catz(data1,data2)
if size(data1,1)>size(data2,1)  %resize data2
    pad_num = size(data1,1)-size(data2,1);
    pad_array = nan(pad_num,size(data2,2),size(data2,3));
    data2 = vertcat(data2,pad_array);
elseif size(data1,1)<size(data2,1)  %resize data1
    pad_num = size(data2,1)-size(data1,1);
    pad_array = nan(pad_num,size(data1,2),size(data1,3));
    data1 = vertcat(data1,pad_array);
end
data_out = cat(3,data1,data2);
data2_sz = size(data2,3);  %get the data2 z dimension for later deconstruction of this matrix
%-----------------------------------------------------------------------------------------------------------------------------
function [filenames_out,uq_names,size_array,uq_headers] = dir_sort(dir_tmp)
%give the directory and it will sort out the files in it and put a cell
%array of filenames
dir_struct = dir(dir_tmp);  %grab the directory information
idx = [dir_struct.isdir];   %grab all of the isdir numbers
names = {dir_struct.name};   %grab the all of the names in the root
filenames = names(~idx);
%filename_tmp = filename_tmp(3:end,:);
for i = 1:size(filenames,2)      %step through each filename and pull the wanted word
    filename_tmp = filenames{i};
    strmask = isstrprop(filename_tmp,'punct');  %find the punctuations, we only want the last two
    strmask2 = filename_tmp=='&';       %we are going to exempt the & character
    strmask3 = isstrprop(filename_tmp,'wspace');    %included the spaces as well
    strmask = strmask-strmask2+strmask3;     %removed and append
    [x,y] = find(strmask==1);  %get the positions
    a = 1;  %initiate
    b = 0;
    tmp = filename_tmp(1,y(end-a)+1:y(end-b)-1);    %create an array of the words
    while ~isempty(str2num(tmp))   %not empty = numbers or vertices file, push forward one and go
        a = a+1;
        b = b+1;
        if size(y,2)-a==0
            break
        end
        tmp = filename_tmp(1,y(end-a)+1:y(end-b)-1);    %create an array of the words
    end
    %now do the same for the front
    a = 0;
    tmp = filename_tmp(1,1:y(1+a)-1);
    while ~isempty(str2num(tmp))&&size(y,2)~=a+1   %not empty = numbers or vertices file, push forward one and go
        a = a+1;
        tmp = filename_tmp(1,1:y(1+a)-1);    %create an array of the words
    end
    if a==0
        f_tmp{i,:} = filename_tmp(1,y(1)+1:y(end-b)-1);
    else
        f_tmp{i,:} = filename_tmp(1,y(a)+1:y(end-b)-1);
    end
    %grab the headers
    uq_headers{i,:} = filename_tmp(1:y(1)-1);
end
uq_names = unique(f_tmp);    %how many unique words are there
%make sure there are no duplicates with different cases
idx = [];   %index of removal
for l = 1:size(uq_names,1)      %step through unique names
    for m = 1:size(uq_names,1)     %step through the all file names
        if strcmpi(uq_names(l,:),uq_names(m,:)) && l<m    %if there is a match (case insensitive now) Remove
            idx = [idx;m];  %remove
        end
    end
end 
uq_names(unique(idx),:) = [];    %remove
for j = 1:size(uq_names,1)      %step through unique names
    tmp = [];
    for k = 1:size(f_tmp,1)     %step through the all file names
        if strcmpi(uq_names(j,:),f_tmp(k,:)) %if there is a match (case insensitive now)
            tmp = vertcat(tmp,filenames(k));  %put it in the current file names temp
        end
    end
    filenames_out{j} = char(tmp);
    size_array(j) = size(char(tmp),1);     %get the size of each file group
end            
uq_headers = unique(uq_headers);
%-----------------------------------------------------------------------------------------------------------------------------
function [norm_data] = quantilenormloco(data)
%This little function makes sure quantile normalization works
%first check to make sure the input data does not have empty columns
data_chk = isnan(data);
chk_data = sum(data_chk,1);     %beging selection process for empty or singleton data columns
chk_data = abs(chk_data-size(data,1));  %nan items calculated
idx = find(chk_data<=1);    %index of the empty or singleton columns
if ~isempty(idx)     %there is an empty or singleton column, fix that
    norm_data = data;     %copy original matrix context for reconstitution
    reconidx = find(chk_data>1);   %index of reconstitution
    data(:,idx) = [];       %remove temporarily
    if ~isempty(data)   %if all is gone, don't do it
        norm_tmp = quantilenorm(data,'Median',1);     %normalize
        norm_data(:,reconidx) = norm_tmp;   %reconstituted in original contex
    end
else    %no problems go on
    norm_data = quantilenorm(data,'Median',1);
end